import numpy as np
import pandas as pd
import random

#Split data into test and verification set
indicator_data = pd.read_csv('C:/Users/joepb/PycharmProjects/data_storage/indicator_data_only_2020.csv')
indicator_data['Date'] = pd.to_datetime(indicator_data['Date'])
df_timetorain = pd.read_csv('C:/Users/joepb/PycharmProjects/data_storage/time_to_rain_2020.csv')
df_timetorain['datetime'] = pd.to_datetime(df_timetorain['datetime'])
del indicator_data['Duck']
del indicator_data['mosquito']
del indicator_data['Other']

df_timetorain = pd.read_csv('C:/Users/joepb/PycharmProjects/data_storage/time_to_rain_2020.csv')


#using 80% training and 20% test
training_list = [random.randint(0,len(indicator_data)) for i in range(0,round(0.8*len(indicator_data)))]
df_training = indicator_data.loc[indicator_data.index.isin(training_list)]
df_training_rain = df_timetorain.loc[df_timetorain.index.isin(training_list)]
df_test = indicator_data.loc[~indicator_data.index.isin(training_list)]
df_test_rain = df_timetorain.loc[~df_timetorain.index.isin(training_list)]

indicator_data = pd.read_csv('C:/Users/joepb/PycharmProjects/data_storage/indicator_data_only.csv')


df_timetorain.to_csv('C:/Users/joepb/PycharmProjects/data_storage/time_to_rain.csv')


#using LOOCV
Loop_amount = 40
p=0
A = 0
B = 0
C = 0
D = 0
output_table_NB = pd.DataFrame()
percentages = pd.DataFrame(index=np.arange(len(indicator_data)))
for t in indicator_data['Date']:
    training_list = [t]
    training_list_1 = [t -  timedelta(days = 1)]
    training_list_2 = [t -  timedelta(days = 2)]
    training_list_3 = [t -  timedelta(days = 3)]
    df_training = indicator_data.loc[~indicator_data['Date'].isin(training_list)]
    df_training_rain = df_timetorain.loc[~df_timetorain['datetime'].isin(training_list)]
    df_test = indicator_data.loc[indicator_data['Date'].isin(training_list)]
    df_test_rain = df_timetorain.loc[df_timetorain['datetime'].isin(training_list)]
    prob_naive, Results_rain = NayiveBayesClass(df_training,df_training_rain,t)
    df_rain_calculation = pd.DataFrame(index=np.arange(8))
    for j in df_test:
        df_rain_calculation.loc[0:7, j] = 0
        if df_test[j].values[0] == 1:
            df_rain_calculation.loc[0, j] = prob_naive.loc[0, j]  # put the applicable chances into a dataframe
            df_rain_calculation.loc[1, j] = prob_naive.loc[2, j]
        if df_test[j].values[0] == 0:
            df_rain_calculation.loc[0, j] = prob_naive.loc[1, j]  # put the applicable chances into a dataframe
            df_rain_calculation.loc[1, j] = prob_naive.loc[3, j]
    df_test_1 =  indicator_data.loc[indicator_data['Date'].isin(training_list_1)]
    try:
        for j in df_test_1:
            if df_test_1[j].values[0] == 1:
                df_rain_calculation.loc[2, j] = prob_naive.loc[4, j]  # put the applicable chances into a dataframe
                df_rain_calculation.loc[3, j] = prob_naive.loc[6, j]
            if df_test_1[j].values[0] == 0:
                df_rain_calculation.loc[2, j] = prob_naive.loc[5, j]  # put the applicable chances into a dataframe
                df_rain_calculation.loc[3, j] = prob_naive.loc[7, j]
    except IndexError:
        pass
    df_test_2 = indicator_data.loc[indicator_data['Date'].isin(training_list_2)]
    try:
        for j in df_test_2:
            if df_test_2[j].values[0] == 1:
                df_rain_calculation.loc[4, j] = prob_naive.loc[8, j]  # put the applicable chances into a dataframe
                df_rain_calculation.loc[5, j] = prob_naive.loc[10, j]
            if df_test_2[j].values[0] == 0:
                df_rain_calculation.loc[4, j] = prob_naive.loc[9, j]  # put the applicable chances into a dataframe
                df_rain_calculation.loc[5, j] = prob_naive.loc[11, j]
    except IndexError:
        pass
    df_test_3 = indicator_data.loc[indicator_data['Date'].isin(training_list_3)]
    try:
        for j in df_test_3:
            if df_test_3[j].values[0] == 1:
                df_rain_calculation.loc[6, j] = prob_naive.loc[12, j]  # put the applicable chances into a dataframe
                df_rain_calculation.loc[7, j] = prob_naive.loc[14, j]
            if df_test_3[j].values[0] == 0:
                df_rain_calculation.loc[6, j] = prob_naive.loc[13, j]  # put the applicable chances into a dataframe
                df_rain_calculation.loc[7, j] = prob_naive.loc[15, j]
    except IndexError:
        pass
    del df_rain_calculation['Date']
    del df_training_rain['datetime']
    df_HK_save = pd.DataFrame(index=list('ABCD') )
    i=0
    c = 0
    rain_prob=[]
    norain_prob = []
    fraction_rain = []
    fraction_norain = []

    for i in range(0,len(df_rain_calculation),2):
        df_HK_save.loc[0:3, i] = 0
        rain_prob = np.insert(rain_prob,c, np.multiply(np.prod([df_rain_calculation.loc[i,:].values]), Results_rain.iloc[c,0] / len(df_training_rain)  ))
        norain_prob = np.insert(norain_prob,c,np.multiply(np.prod([df_rain_calculation.loc[i+1,:].values]), (len(df_training_rain - Results_rain.iloc[c,0])) / len(df_training_rain) ))
        fraction_rain = np.insert(fraction_rain,c,np.multiply((1 / (rain_prob[c] + norain_prob[c])), rain_prob[c]))
        fraction_norain = np.insert(fraction_norain,c,np.multiply((1 / (rain_prob[c] + norain_prob[c])), norain_prob[c]))
        c += 1
        total_fraction_rain = np.sum(fraction_rain)
        total_fraction_norain = np.sum(fraction_norain)
        # total_fraction_rain = np.prod(fraction_rain)
        # total_fraction_norain = np.prod(fraction_norain)
    
    print(fraction_rain)
    print(fraction_norain)
    output_table_NB.loc[t, 0] = total_fraction_rain
    output_table_NB.loc[t, 1] = total_fraction_norain

    if total_fraction_rain > total_fraction_norain:
        # output_table_NB.loc[t,0] = total_fraction_rain
        if df_test_rain.iloc[0,0] ==1 :
            A += 1
        else:
            C +=1
    elif total_fraction_rain > 1.5: #Mag dit?? misschien in decision tree stoppen
        if df_test_rain.iloc[0,0] == 1:
            A += 1
        else:
            C += 1

    elif total_fraction_rain < total_fraction_norain:
        # output_table_NB.loc[t,0] = -(total_fraction_norain)
        if df_test_rain.iloc[0,0] == 1:
            B += 1
#            percentages.loc[t,:] = 0
#             percentages.loc[p,0] = total_fraction_rain
#             percentages.loc[p,1] = total_fraction_norain
#             p += 1
        else:
            D +=1
    else:
        if df_test_rain.iloc[0, 0] == 1:
            B += 1
        #            percentages.loc[t,:] = 0
        #             percentages.loc[p,0] = total_fraction_rain
        #             percentages.loc[p,1] = total_fraction_norain
        #             p += 1
        else:
            D += 1
    #     output_table_NB.loc[t, 0] = 0
    #     print(total_fraction_norain,total_fraction_rain)

print([A,B,C,D])
print('HK_skill_score:', HK_skill_score(A, B, C, D))
print('Accuracy:', Accuracy(A, B, C, D))

output_table_NB.to_csv('C:/Users/joepb/PycharmProjects/data_storage/NB_predicition.csv',index=False)

#Make a loop so the HK of all 4 days is calculated
#Combine with Yes/no forecastst, maybe with a decision tree?


#multiply all values in the same line
#calculate

#check skill
def HK_skill_score(A, B, C, D):
    HK = (A * D - C * B) / ((A + B) * (C + D))
    return HK

def Accuracy(A,B,C,D):
    Acc = (A+D) / (A+B+C+D)
    return Acc
#,training_list
def NayiveBayesClass(ind_data,train_rain,test_date):
    Results = pd.DataFrame(index=np.arange(18))
    i = 0
    j = 0
    k=0
    l=0
    for j in ind_data:
        Results.loc[0:17, j] = 0
        for i in range(len(ind_data)):
            l=0
            try:
                if ind_data.loc[i, j] == 1:
                    Results.loc[l, j] += 1  # total occurence of the indicator
                    for k in range(0, 4):
                        l += 1
                        try:
                            if (ind_data.loc[i,'Date'] + timedelta(days = k)) == test_date:
                                l += 1
                            else:
                                if train_rain.iloc[i, k] == 1:  # positive sameday (A)
                                    Results.loc[l, j] += 1
                                l += 1
                                if train_rain.iloc[i, k] == 0:  # false alarm sameday (C)
                                    Results.loc[l, j] += 1
                        except KeyError:
                            continue
            except KeyError:
                continue
            try:
                if ind_data.loc[i, j] == 0:
                    l += 9
                    Results.loc[l, j] += 1  # total non-occurence of the indicator
                    for k in range(0, 4):
                        l += 1
                        try:
                            if (ind_data.loc[i,'Date'] + timedelta(days = k)) == test_date:
                                l += 1
                            else:
                                if train_rain.iloc[i, k] == 1:  # positive sameday (A)
                                    Results.loc[l, j] += 1
                                l += 1
                                if train_rain.iloc[i, k] == 0:  # false alarm sameday (C)
                                    Results.loc[l, j] += 1
                        except KeyError:
                            continue
            except KeyError:
                continue
        if j == 'mosquito':
            break

    Results_rain = pd.DataFrame(index=np.arange(4))
    Results_rain.loc[0:3, 'Total rain'] = 0
    for i in range(len(ind_data)):
        for k in range(0, 4):
            try:
                if (ind_data.loc[i,'Date'] + timedelta(days = k)) == test_date:
                    print('-')
                else:
                    if train_rain.iloc[i, k] == 1:  # total rain sameday
                        Results_rain.loc[k, 'Total rain'] += 1
            except KeyError:
                continue
    Results_rain = Results_rain.astype(int)
    # probability of rain is given by:
    # devide every

    # nayive bayes classification----------------------------------------------------

    df_nayive = pd.DataFrame(index=np.arange(16))
    for k in Results:
        df_nayive.loc[0:15, k] = 0
        sameday_list = [Results.loc[1, k], Results.loc[10, k], Results.loc[2, k], Results.loc[11, k]]  # sameday
        one_daylist = [Results.loc[3, k], Results.loc[12, k], Results.loc[4, k], Results.loc[13, k]]  # 1 day
        two_daylist = [Results.loc[5, k], Results.loc[14, k], Results.loc[6, k], Results.loc[15, k]]  # 2 day
        three_daylist = [Results.loc[7, k], Results.loc[16, k], Results.loc[8, k], Results.loc[17, k]]  # 3 day
        df_nayive.loc[0:1, k] = np.divide(sameday_list[0:2], Results_rain.iloc[0, 0])  # Rain
        df_nayive.loc[2:3, k] = np.divide(sameday_list[2:4], len(train_rain) - Results_rain.iloc[0, 0])  # No-Rain
        df_nayive.loc[4:5, k] = np.divide(one_daylist[0:2], Results_rain.iloc[1, 0])
        df_nayive.loc[6:7, k] = np.divide(one_daylist[2:4], len(train_rain) - Results_rain.iloc[1, 0])
        df_nayive.loc[8:9, k] = np.divide(two_daylist[0:2], Results_rain.iloc[2, 0])
        df_nayive.loc[10:11, k] = np.divide(two_daylist[2:4], len(train_rain) - Results_rain.iloc[2, 0])
        df_nayive.loc[12:13, k] = np.divide(three_daylist[0:2], Results_rain.iloc[3, 0])
        df_nayive.loc[14:15, k] = np.divide(three_daylist[2:4], len(train_rain) - Results_rain.iloc[3, 0])
        if k == 'mosquito':
            break
    return df_nayive, Results_rain



#1 check for every indicator if it is 1 or 0
#2 check probability of rain on all timesteps
#3 Multiply all yes prob
#4 multiply all no prob
#5 calculate final yes and no prob.





